
clear
cd "D:\data_replication"

use estimation\1_data_format\data_base.dta, clear

keep if year < 2006
keep if declarant == 6

sort product_id
gen dom_share_temp = exp_share if declarant == partner
by product_id: egen dom_share = mean(dom_share_temp)
by product_id: keep if _n == 1

keep pc8plus dom_share
gen no_dom_pc8plus = 1
replace no_dom_pc8plus = 0 if dom_share != .
gen pc = pc8plus
// 344

gen ll = length(pc8plus)
gen pc7 = substr(pc8plus, 1, 7) if ll == 8
replace pc7 = pc8plus if ll != 8
replace pc7 = "1821225" if pc8plus == "43.2005" 
replace pc7 = "2613123" if pc8plus == "31.2007" 
replace pc7 = "3150310" if pc8plus == "74.2007" 
sort pc7
by pc7: egen dom_share_pc7 = mean(dom_share)
by pc7: egen replace_pc7 = sum(no_dom_pc8plus)
gen no_dom_pc7 = 1
replace no_dom_pc7 = 0 if dom_share_pc7 != .
replace pc = pc7 if replace_pc7 > 0 & dom_share_pc7 != .
// 259

gen pc6 = substr(pc8plus, 1, 6) if ll == 8
replace pc6 = pc8plus if ll != 8
replace pc6 = "182235" if pc8plus == "62.2005" 
replace pc6 = "296014" if pc8plus == "37.2007" 
replace pc6 = "268211" if pc8plus == "33.2007" 
replace pc6 = "292212" if pc8plus == "3.2007" 
replace pc6 = "182122" if pc8plus == "43.2005" 
replace pc6 = "261312" if pc8plus == "31.2007" 
replace pc6 = "315031" if pc8plus == "74.2007" 
sort pc6
by pc6: egen dom_share_pc6 = mean(dom_share)
by pc6: egen replace_pc6 = sum(no_dom_pc7)
gen no_dom_pc6 = 1
replace no_dom_pc6 = 0 if dom_share_pc6 != .
replace pc = pc6 if replace_pc6 > 0 & dom_share_pc6 != .
// 92

gen pc5 = substr(pc8plus, 1, 5) if ll == 8
replace pc5 = pc8plus if ll != 8
replace pc5 = "18223" if pc8plus == "62.2005" 
replace pc5 = "33403" if pc8plus == "7.2007"
replace pc5 = "29601" if pc8plus == "37.2007" 
replace pc5 = "26821" if pc8plus == "33.2007" 
replace pc5 = "15332" if pc8plus == "64.2007" 
replace pc5 = "29221" if pc8plus == "3.2007" 
replace pc5 = "18212" if pc8plus == "43.2005" 
replace pc5 = "26131" if pc8plus == "31.2007" 
replace pc5 = "31503" if pc8plus == "74.2007" 
replace pc5 = "14111" if pc5 == "13101" | pc5 == "13201"
sort pc5
by pc5: egen dom_share_pc5 = mean(dom_share)
by pc5: egen replace_pc5 = sum(no_dom_pc6)
gen no_dom_pc5 = 1
replace no_dom_pc5 = 0 if dom_share_pc5 != .
replace pc = pc5 if replace_pc5 > 0 & dom_share_pc5 != .
// 23

gen pc4 = substr(pc8plus, 1, 4) if ll == 8
replace pc4 = pc8plus if ll != 8
replace pc4 = "1822" if pc8plus == "62.2005" 
replace pc4 = "3340" if pc8plus == "7.2007"
replace pc4 = "2960" if pc8plus == "37.2007" 
replace pc4 = "2682" if pc8plus == "33.2007" 
replace pc4 = "1533" if pc8plus == "64.2007" 
replace pc4 = "2922" if pc8plus == "3.2007" 
replace pc4 = "1821" if pc8plus == "43.2005" 
replace pc4 = "2613" if pc8plus == "31.2007" 
replace pc4 = "3150" if pc8plus == "74.2007" 
replace pc4 = "1411" if pc5 == "14111" 
sort pc4
by pc4: egen dom_share_pc4 = mean(dom_share)
by pc4: egen replace_pc4 = sum(no_dom_pc5)
gen no_dom_pc4 = 1
replace no_dom_pc4 = 0 if dom_share_pc4 != .
replace pc = pc4 if replace_pc4 > 0 & dom_share_pc4 != .
// 13, only synthetic

gen pc3 = substr(pc8plus, 1, 3) if ll == 8
replace pc3 = pc8plus if ll != 8
replace pc3 = "182" if pc8plus == "62.2005" 
replace pc3 = "334" if pc8plus == "7.2007"
replace pc3 = "296" if pc8plus == "37.2007" 
replace pc3 = "268" if pc8plus == "33.2007" 
replace pc3 = "153" if pc8plus == "64.2007" 
replace pc3 = "292" if pc8plus == "3.2007" 
replace pc3 = "182" if pc8plus == "43.2005" 
replace pc3 = "261" if pc8plus == "31.2007" 
replace pc3 = "315" if pc8plus == "74.2007" 
replace pc3 = "141" if pc5 == "14111" 
sort pc3
by pc3: egen dom_share_pc3 = mean(dom_share)
by pc3: egen replace_pc3 = sum(no_dom_pc4)
gen no_dom_pc3 = 1
replace no_dom_pc3 = 0 if dom_share_pc3 != .
replace pc = pc3 if replace_pc3 > 0 & dom_share_pc3 != .
// 13, only synthetic

gen pc2 = substr(pc8plus, 1, 2) if ll == 8
replace pc2 = pc8plus if ll != 8
replace pc2 = "18" if pc8plus == "62.2005" 
replace pc2 = "33" if pc8plus == "7.2007"
replace pc2 = "29" if pc8plus == "37.2007" 
replace pc2 = "26" if pc8plus == "33.2007" 
replace pc2 = "15" if pc8plus == "64.2007" 
replace pc2 = "29" if pc8plus == "3.2007" 
replace pc2 = "18" if pc8plus == "43.2005" 
replace pc2 = "26" if pc8plus == "31.2007" 
replace pc2 = "31" if pc8plus == "74.2007" 
replace pc2 = "14" if pc5 == "14111" 
sort pc2
by pc2: egen dom_share_pc2 = mean(dom_share)
by pc2: egen replace_pc2 = sum(no_dom_pc3)
gen no_dom_pc2 = 1
replace no_dom_pc2 = 0 if dom_share_pc2 != .
replace pc = pc2 if replace_pc2 > 0 & dom_share_pc2 != .
// 13, only synthetic

save estimation\1_product_list\output\A6_data_uk_temp.dta, replace


sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)

keep if pc8plus_comp == 1
keep if ll != 8
keep pc pc8plus
order pc pc8plus
save estimation\1_product_list\output\A6_pc_comp_uk.dta, replace


use estimation\1_product_list\output\A6_data_uk_temp.dta, replace
sort pc
gen pc8plus_d = 0
replace pc8plus_d = 1 if ll != 8 & pc8plus != pc
sort pc
by pc: egen pc8plus_comp = max(pc8plus_d)

drop if pc8plus_comp == 1
sort pc
by pc: keep if _n == 1
keep pc 
save estimation\1_product_list\output\A6_pc_uk.dta, replace

rm estimation\1_product_list\output\A6_data_uk_temp.dta
